set printback=OFF.

TITLE LIS Cross-section Data center in Luxembourg.

TITLE email: usersupport@lisdatacenter.org .

TITLE LIS Self Teaching Package 2022.

TITLE Part II: Gender, employment, and wages.
TITLE SPSS version.

TITLE last change of this version of the syntax: 15-01-2022.

* The exercises in Part II emphasises the use of person-level data, including wages, .
* demographics, and labour market information. Building-up on the techniques presented .
* in Part I, they introduce regression modelling and continue to lead you through .
* the process of developing a comparative analysis on inequality and poverty across countries.


TITLE Exercise 1: Merging person and household data, selecting a sample.

define ex21 ().
select if age ge 25 and age le 54.
select if relation ge 1000 and relation le 2200.
if own ge 100 and own le 199 owner = 1.
if own ge 200 and own le 299 owner = 0.
weight by ppopwgt.
frequencies variables = owner.
!enddefine.

match files file = us04p 
  /table = us04h
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1
  /by hid.
ex21.
match files file = be04p 
  /table = be04h
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1
  /by hid.
ex21.
match files file = gr04p 
  /table = gr04h
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1
  /by hid.
ex21.

TITLE Exercise 2: Stacking data, employment rates by gender.

define ex22 ().
select if age ge 25 and age le 54.
select if relation ge 1000 and relation le 2200.
if own ge 100 and own le 199 owner = 1.
if own ge 200 and own le 299 owner = 0.
weight by ppopwgt.
sort cases by dname.
split file by dname.

*frequencies variables = owner.
crosstabs sex by emp /cells = row.
select if emp eq 1.
crosstabs sex by ptime1 /cells = row.
!enddefine .

match files file = us04p  
  /table = us04h 
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1 
  /by hid. 
save outfile = "mydata\us04_yid.sav". 
match files file = be04p  
  /table = be04h 
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1 
  /by hid. 
save outfile = "mydata\be04_yid.sav". 
match files file = gr04p  
  /table = gr04h 
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1 
  /by hid. 
save outfile = "mydata\gr04_yid.sav". 
add file file = "mydata\us04_yid.sav" 
         /file = "mydata\be04_yid.sav" 
         /file = "mydata\gr04_yid.sav". 
save outfile = "mydata\stackex_yid.sav". 
ex22.



TITLE Exercise 3: Family structure and employment.


define ex23 (). 
select if age ge 25 and age le 54. 
select if relation ge 1000 and relation le 2200. 
select if sex eq 2.
weight by ppopwgt. 
recode ageyoch (0 thru 5=1) (6 thru 17=2) (ELSE=0) into achildcat.
add value labels achildcat 
   0 "no children under 18" 
   1 "under 6 years" 
   2 "6-17 years". 
sort cases by dname partner. 
split file by dname partner. 
crosstabs achildcat by emp /cells = row. 
!enddefine . 

*match files file = us04p    
  /table = us04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid. 
  
*save outfile = "mydata\us04_yid.sav". 
  
*match files file = be04p    
  /table = be04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.   
  
*save outfile = "mydata\be04_yid.sav". 
  
*match files file = gr04p    
  /table = gr04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.   
  
*save outfile = "mydata\gr04_yid.sav".   

*add file file = "mydata\us04_yid.sav"   
         /file = "mydata\be04_yid.sav"   
         /file = "mydata\gr04_yid.sav". 
		 
*save outfile = "mydata\stackex_yid.sav".   
get file = "mydata\stackex_yid.sav". 
ex23.


TITLE Exercise 4: Dependent employment and hourly wages.

define dataprepare ().  
SET ERRORS OFF. 
select if age ge 25 and age le 54.
select if relation ge 1000 and relation le 2200.
weight by ppopwgt.
compute depemp = $sysmis.
if status1 ge 100 and status1 le 120 depemp = 1.
if status1 ge 200 and status1 le 240 depemp = 0.
compute hourwage = hwage1.
if ( hwage1<0)  hourwage=0.
* create top and bottom coded household disposable income.
compute hourwage_log = ln(hourwage). 
EXECUTE.
if (missing(hourwage_log) & NOT(missing(hourwage))) hourwage_log=0. 
EXECUTE.
weight by ppopwgt. 
!enddefine . 

define decilecalc (). 
preserve .  
set tvars names tnumbers values.  
dataset declare decileratio.  
WEIGHT BY ppopwgt. 
sort cases by did.  
split file by did.  
OMS      
 / select tables      
 / if command = ['Frequencies'] subtypes=['Statistics']      
 /destination format = sav        outfile = 'decileratio'      
 /columns sequence = [l1 r2] .  
frequencies variables = hourwage_log 
  /percentiles = 25 50 75 
  /format = notable . 
OMSEND. 
weight off. 
restore. 
match files file = * 
  /table = 'decileratio' 
  /rename (var1 = did) 
  /by did . 
!enddefine .  
 
define topbottom (). 
weight by ppopwgt. 
COMPUTE iqr=hourwage_log_75-hourwage_log_25. 
EXECUTE. 
* detect upper bound for extreme values 
COMPUTE upper_bound=hourwage_log_75 + (iqr * 3). 
EXECUTE. 
COMPUTE lower_bound=hourwage_log_25 - (iqr * 3). 
EXECUTE. 
* top code income at upper bound for extreme values 
if hourwage>exp(upper_bound) hourwage=exp(upper_bound).  
EXECUTE. 
* bottom code income at lower bound for extreme values 
if hourwage<exp(lower_bound) hourwage=exp(lower_bound). 
EXECUTE. 
!enddefine.


*match files file = us04p   
  /table = us04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\us04_yid.sav".  

*match files file = be04p   
  /table = be04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\be04_yid.sav".  


*match files file = gr04p   
  /table = gr04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\gr04_yid.sav". 
 
*add file file = "mydata\us04_yid.sav"  
         /file = "mydata\be04_yid.sav"  
         /file = "mydata\gr04_yid.sav". 
		 
*save outfile = "mydata\stackex_yid.sav".  
get file = "mydata\stackex_yid.sav".
dataprepare.
decilecalc.
topbottom.
sort cases by dname.
split file by dname.
select if emp = 1.
crosstabs sex by depemp /cells = row.
select all.
sort cases by dname sex.
split file by dname sex.
frequencies variables = hourwage /statistics = median /format = notable. 



TITLE Exercise 5: Hourly wages, education, and country-specific variables.

define dataprepare (). 
SET ERRORS OFF.  
select if age ge 25 and age le 54. 
select if relation ge 1000 and relation le 2200. 
weight by ppopwgt. 
sort cases by dname. 
split file by dname. 
value labels educ_c. 
crosstabs educ_c by educ. 
frequencies variables = educ_c.  


compute hourwage = hwage1.
if ( hwage1<0)  hourwage=0.
* create top and bottom coded household disposable income.
compute hourwage_log = ln(hourwage). 
EXECUTE.
if (missing(hourwage_log) & NOT(missing(hourwage))) hourwage_log=0. 
EXECUTE.
WEIGHT by ppopwgt. 
!enddefine . 

define decilecalc (). 
preserve .  
set tvars names tnumbers values.  
dataset declare decileratio.  
WEIGHT BY ppopwgt. 
sort cases by did.  
split file by did.  
OMS      
 / select tables      
 / if command = ['Frequencies'] subtypes=['Statistics']      
 /destination format = sav        outfile = 'decileratio'      
 /columns sequence = [l1 r2] .  
frequencies variables = hourwage_log 
  /percentiles = 25 50 75 
  /format = notable . 
OMSEND. 
weight off. 
restore. 
match files file = * 
  /table = 'decileratio' 
  /rename (var1 = did) 
  /by did . 
!enddefine .  
 
define topbottom (). 
WEIGHT by ppopwgt. 
COMPUTE iqr=hourwage_log_75-hourwage_log_25. 
EXECUTE. 
* detect upper bound for extreme values 
COMPUTE upper_bound=hourwage_log_75 + (iqr * 3). 
EXECUTE. 
COMPUTE lower_bound=hourwage_log_25 - (iqr * 3). 
EXECUTE. 
* top code income at upper bound for extreme values 
if hourwage>exp(upper_bound) hourwage=exp(upper_bound).  
EXECUTE. 
* bottom code income at lower bound for extreme values 
if hourwage<exp(lower_bound) hourwage=exp(lower_bound). 
EXECUTE. 
!enddefine . 

* match files file = us04p    
  /table = us04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.     
* save outfile = "mydata\us04_yid.sav".
   
* match files file = be04p    
  /table = be04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.     
* save outfile = "mydata\be04_yid.sav".  
 
* match files file = gr04p    
  /table = gr04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.     
* save outfile = "mydata\gr04_yid.sav". 
  
* add file file = "mydata\us04_yid.sav"   
         /file = "mydata\be04_yid.sav"   
         /file = "mydata\gr04_yid.sav".
		 
* save outfile = "mydata\stackex_yid.sav".   
get file = "mydata\stackex_yid.sav". 

dataprepare.
decilecalc.
topbottom.
sort cases by dname. 
split file by dname.
aggregate outfile= *  
    mode = addvariables  
    /break = did educ sex  
    /mededusex= median(hourwage). 
sort cases by dname educ sex. 
split file by dname educ sex. 
descriptives variables = mededusex.  


TITLE Exercise 6: Immigration and wages, understanding harmonisation.


define dataprepare (). 
SET ERRORS OFF.  
select if age ge 25 and age le 54. 
select if relation ge 1000 and relation le 2200. 
weight by ppopwgt. 
sort cases by dname. 
split file by dname. 



compute hourwage = hwage1.
if ( hwage1<0)  hourwage=0.
* create top and bottom coded household disposable income.
compute hourwage_log = ln(hourwage). 
EXECUTE.
if (missing(hourwage_log) & NOT(missing(hourwage))) hourwage_log=0. 
EXECUTE.
WEIGHT by ppopwgt. 
!enddefine . 

define decilecalc (). 
preserve .  
set tvars names tnumbers values.  
dataset declare decileratio.  
WEIGHT BY ppopwgt. 
sort cases by did.  
split file by did.  
OMS      
 / select tables      
 / if command = ['Frequencies'] subtypes=['Statistics']      
 /destination format = sav        outfile = 'decileratio'      
 /columns sequence = [l1 r2] .  
frequencies variables = hourwage_log 
  /percentiles = 25 50 75 
  /format = notable . 
OMSEND. 
weight off. 
restore. 
match files file = * 
  /table = 'decileratio' 
  /rename (var1 = did) 
  /by did . 
!enddefine .  
 
define topbottom (). 
WEIGHT by ppopwgt. 
COMPUTE iqr=hourwage_log_75-hourwage_log_25. 
EXECUTE. 
* detect upper bound for extreme values 
COMPUTE upper_bound=hourwage_log_75 + (iqr * 3). 
EXECUTE. 
COMPUTE lower_bound=hourwage_log_25 - (iqr * 3). 
EXECUTE. 
* top code income at upper bound for extreme values 
if hourwage>exp(upper_bound) hourwage=exp(upper_bound).  
EXECUTE. 
* bottom code income at lower bound for extreme values 
if hourwage<exp(lower_bound) hourwage=exp(lower_bound). 
EXECUTE. 
!enddefine . 

*match files file = us04p    
  /table = us04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.     
*save outfile = "mydata\us04_yid.sav".
   
*match files file = be04p    
  /table = be04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.     
*save outfile = "mydata\be04_yid.sav".  
 
*match files file = gr04p    
  /table = gr04h   
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1   
  /by hid.     
*save outfile = "mydata\gr04_yid.sav". 
  
*add file file = "mydata\us04_yid.sav"   
         /file = "mydata\be04_yid.sav"   
         /file = "mydata\gr04_yid.sav".
		 
*save outfile = "mydata\stackex_yid.sav".   
get file = "mydata\stackex_yid.sav". 

dataprepare.
decilecalc.
topbottom.
sort cases by dname. 
split file by dname.
aggregate outfile= * 
    mode = addvariables 
    /break = did immigr sex 
    /medimmsex= median(hourwage).
sort cases by dname immigr sex.
split file by dname immigr sex.
descriptives variables = medimmsex.



TITLE Exercise 7: Wage regressions.


define dataprepare ().
select if age ge 25 and age le 54.
select if relation ge 1000 and relation le 2200.
weight by ppopwgt.
if own ge 100 and own le 199 homeowner = 1.
if own ge 200 and own le 299 homeowner = 0.
recode ageyoch (0 thru 5=1) (6 thru 17=2) (ELSE=0) into achildcat.
add value labels achildcat
   0 "no children under 18"
   1 "under 6 years"
   2 "6-17 years" .
compute depemp = $sysmis.
if status1 ge 100 and status1 le 120 depemp = 1.
if status1 ge 200 and status1 le 240 depemp = 0.



compute hourwage = hwage1.
if ( hwage1<0)  hourwage=0.
* create top and bottom coded household disposable income.
compute hourwage_log = ln(hourwage). 
EXECUTE.
if (missing(hourwage_log) & NOT(missing(hourwage))) hourwage_log=0. 
EXECUTE.
WEIGHT by ppopwgt. 
!enddefine . 

define decilecalc (). 
preserve .  
set tvars names tnumbers values.  
dataset declare decileratio.  
WEIGHT BY ppopwgt. 
sort cases by did.  
split file by did.  
OMS      
 / select tables      
 / if command = ['Frequencies'] subtypes=['Statistics']      
 /destination format = sav        outfile = 'decileratio'      
 /columns sequence = [l1 r2] .  
frequencies variables = hourwage_log 
  /percentiles = 25 50 75 
  /format = notable . 
OMSEND. 
weight off. 
restore. 
match files file = * 
  /table = 'decileratio' 
  /rename (var1 = did) 
  /by did . 
!enddefine .  
 
define topbottom (). 
WEIGHT by ppopwgt. 
COMPUTE iqr=hourwage_log_75-hourwage_log_25. 
EXECUTE. 
* detect upper bound for extreme values 
COMPUTE upper_bound=hourwage_log_75 + (iqr * 3). 
EXECUTE. 
COMPUTE lower_bound=hourwage_log_25 - (iqr * 3). 
EXECUTE. 
* top code income at upper bound for extreme values 
if hourwage>exp(upper_bound) hourwage=exp(upper_bound).  
EXECUTE. 
* bottom code income at lower bound for extreme values 
if hourwage<exp(lower_bound) hourwage=exp(lower_bound). 
EXECUTE. 
!enddefine . 



define ex27 (). 
compute logwage = ln(hourwage).
compute agesq = age*age.
if achildcat eq 1 youngchild = 1.
if achildcat eq 0 or achildcat eq 2 youngchild = 0.
if achildcat eq 2 oldchild = 1.
if achildcat eq 0 or achildcat eq 1 oldchild = 0.
if educ eq 2 mededuc = 1.
if educ eq 1 or educ eq 3 mededuc = 0.
if educ eq 3 hieduc = 1.
if educ eq 1 or educ eq 2 hieduc = 0.
!enddefine . 




*match files file = us04p   
  /table = us04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\us04_yid.sav".
  
*match files file = be04p   
  /table = be04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\be04_yid.sav".  

*match files file = gr04p   
  /table = gr04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\gr04_yid.sav".  

*add file file = "mydata\us04_yid.sav"  
         /file = "mydata\be04_yid.sav"  
         /file = "mydata\gr04_yid.sav".  
		 
*save outfile = "mydata\stackex_yid.sav".  
get file = "mydata\stackex_yid.sav".
dataprepare.
decilecalc.
topbottom.
ex27.
sort cases by dname sex.
split file by dname sex.
weight off.
regression /regwgt=ppopwgt 
           /dependent=logwage 
           /method=enter age agesq mededuc hieduc immigr partner youngchild oldchild ptime1 homeowner. 
		   
		   
 
 
TITLE Exercise 8: Pooled regressions and normalised weights.

define dataprepare ().
select if age ge 25 and age le 54.
select if relation ge 1000 and relation le 2200.
weight by ppopwgt.
if own ge 100 and own le 199 homeowner = 1.
if own ge 200 and own le 299 homeowner = 0.
recode ageyoch (0 thru 5=1) (6 thru 17=2) (ELSE=0) into achildcat.
add value labels achildcat
   0 "no children under 18"
   1 "under 6 years"
   2 "6-17 years" .
compute depemp = $sysmis.
if status1 ge 100 and status1 le 120 depemp = 1.
if status1 ge 200 and status1 le 240 depemp = 0.



compute hourwage = hwage1.
if ( hwage1<0)  hourwage=0.
* create top and bottom coded household disposable income.
compute hourwage_log = ln(hourwage). 
EXECUTE.
if (missing(hourwage_log) & NOT(missing(hourwage))) hourwage_log=0. 
EXECUTE.
WEIGHT by ppopwgt. 
!enddefine . 

define decilecalc (). 
preserve .  
set tvars names tnumbers values.  
dataset declare decileratio.  
WEIGHT BY ppopwgt. 
sort cases by did.  
split file by did.  
OMS      
 / select tables      
 / if command = ['Frequencies'] subtypes=['Statistics']      
 /destination format = sav        outfile = 'decileratio'      
 /columns sequence = [l1 r2] .  
frequencies variables = hourwage_log 
  /percentiles = 25 50 75 
  /format = notable . 
OMSEND. 
weight off. 
restore. 
match files file = * 
  /table = 'decileratio' 
  /rename (var1 = did) 
  /by did . 
!enddefine .  
 
define topbottom (). 
WEIGHT by ppopwgt. 
COMPUTE iqr=hourwage_log_75-hourwage_log_25. 
EXECUTE. 
* detect upper bound for extreme values 
COMPUTE upper_bound=hourwage_log_75 + (iqr * 3). 
EXECUTE. 
COMPUTE lower_bound=hourwage_log_25 - (iqr * 3). 
EXECUTE. 
* top code income at upper bound for extreme values 
if hourwage>exp(upper_bound) hourwage=exp(upper_bound).  
EXECUTE. 
* bottom code income at lower bound for extreme values 
if hourwage<exp(lower_bound) hourwage=exp(lower_bound). 
EXECUTE. 
!enddefine . 



define ex28 (). 
compute logwage = ln(hourwage).
compute agesq = age*age.
if achildcat eq 1 youngchild = 1.
if achildcat eq 0 or achildcat eq 2 youngchild = 0.
if achildcat eq 2 oldchild = 1.
if achildcat eq 0 or achildcat eq 1 oldchild = 0.
if educ eq 2 mededuc = 1.
if educ eq 1 or educ eq 3 mededuc = 0.
if educ eq 3 hieduc = 1.
if educ eq 1 or educ eq 2 hieduc = 0.
compute ppp = $sysmis.
if dname eq "be04" ppp=0.86.
if dname eq "gr04" ppp=0.65. 
if dname eq "us04" ppp=1.
compute hourwage_ppp = hourwage/ppp.
compute logwage_ppp = ln(hourwage_ppp).
compute belgium=0.
if dname eq "be04" belgium=1.
compute greece=0.
if dname eq "gr04" greece =1.

!enddefine . 




*match files file = us04p   
  /table = us04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\us04_yid.sav".
  
*match files file = be04p   
  /table = be04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\be04_yid.sav".  

*match files file = gr04p   
  /table = gr04h  
  /keep=did hid own dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1  
  /by hid.  
  
*save outfile = "mydata\gr04_yid.sav".  

*add file file = "mydata\us04_yid.sav"  
         /file = "mydata\be04_yid.sav"  
         /file = "mydata\gr04_yid.sav".  
		 
*save outfile = "mydata\stackex_yid.sav".  
get file = "mydata\stackex_yid.sav".
dataprepare.
decilecalc.
topbottom.
ex28.
sort cases by sex.
split file by sex.	
weight off.	   
regression /regwgt=ppopwgt 
           /dependent=logwage_ppp 
           /method=enter age agesq mededuc hieduc immigr partner youngchild oldchild ptime1 homeowner belgium greece. 		   
	   